clear all
set matsize 4000
*set type double, permanently

*add file path
gl dat ""
gl do ""
gl text ""
gl texg ""

cap pr drop max
pr de max
	ren `1' `1'_
	bys id: egen `1'=max(`1'_)
	drop `1'_
end

use "$dat/ZEMIS_start.dta", clear


**********************
*** restrict sample **
**********************
sum year if yearN==1 //  599 447
drop if (fyear<2007 | fyear>2015) // to speed up


* calculate average number of cases per day 
sum year if yearN==1 & year>=2008 & year<=2013 & !mi(cantass)
display ( (r(N)/(2013-2008+1))/52)/5 // 60 cases per day (excluding weekends)


********************************
**  Add coworker information  **
********************************

foreach x in conat coeth colan {
merge 1:1 id year using "$dat/workplace_stat_`x'.dta" 
drop if _merge==2 
drop _merge
}


*************************************************
**** add co-nationals/o-ethnics/co_linguistic  **
*************************************************

* add number of conationals with B- or C-permit (based on PETRA/STATPOP)
foreach t in 1 2 {
if `t'==2 replace fyear=fyear+1 //to have an alternative measure that merges on enclave in assignment year instead of year t-1
foreach q in age all {
if "`q'"=="age" merge m:1 cantass fyear using "$dat/cant_`q'.dta" 
else merge m:1 cantass fyear using "$dat/cant_`q'.dta", keepusing(c* mig)
ren mig mig_`q'_`t'

drop if _merge==2 
drop _merge

g match_bcperm_`q'_`t'=0 if !mi(cantass) & !mi(country) & country!=998 & country!=-1 & year!=1994 // if no one from country in canton the previous year

levelsof country, local(clist)
foreach c of local clist {
cap qui replace match_bcperm_`q'_`t'=c`c' if country==`c' // cap since error for missing c998
}
drop c2* c3* c4* c5* c6* 
cap drop sum_c*

* add number of people with f-permit in canton (based on ZEMIS)
merge m:1 cantass fyear using "$dat/cant_fperm_`q'.dta" 
drop if _merge==2
drop _merge

ren fmig fmig_`q'_`t' 
g match_fperm_`q'_`t'=0 if !mi(cantass) & !mi(country) & country!=998 &  country!=-1 & year!=1994

levelsof country, local(clist)
foreach c of local clist {
cap qui replace match_fperm_`q'_`t'=c`c' if country==`c' // cap since error for missing countries
}
drop c2* c3* c4* c5* 
}
if `t'==2 replace fyear=fyear-1
}
ren mig_age_1 mig
ren fmig_age_1 fmig


*add number of conationals and coethnics based on zemis data 
*only defined for age 18-65 and measured in year t-1

foreach x in ethn spr  {
merge m:1 cantass fyear using "$dat/cant_num_`x'.dta" 
drop if _merge==2 // years not in 2007-2017 interval  
drop _merge
if "`x'"=="ethn" loc t e
if "`x'"=="spr" loc t l
g `t'match=0 if !mi(cantass)  & `x'_cd!=-1 & `x'_cd!=998 & year!=1994

levelsof `x'_cd, local(clist)
foreach c of local clist {
cap qui replace `t'match=e`c' if `x'_cd==`c'
}

cap drop  e1* e2* e3* e4* e5* e6* e7* e8* e9*
cap drop  e1* e2* e3* e4* e5* e6* e9*
g ln`t'match=ln(`t'match+1) if !mi(`t'match)
}


******'********************************************
**** create new variables measuring co-national ***
***************************************************

g match=match_bcperm_age_1+match_fperm_age_1 //f-people from Zemis and B and C from petra/statpop, N not included
g match_all=match_bcperm_all_1+match_fperm_all_1
g match_current=(match_bcperm_age_2+match_fperm_age_2) // same year as assigned 

g get_first_year=1 if (ygetF==0 & yearN==1) | (ygetBC==0 & yearN==1)
max get_first_year
replace match_current=match_current-1 if get_first_year==1 & age_arrive>=18 & age_arrive<=65 & !mi(match_current) // only remove own person if get permit first year 
drop get_first_year
*sum match_current  // no neg values

g lnmatch=ln(match+1) if !mi(match)
g lnmatch_current=ln(match_current+1)
g lnmatch_all=ln(match_all+1)

replace mig=mig+fmig-match // remove own group
*sum mig // no neg values
g lnmig=ln(mig)

drop match_bc* match_fp*

*****************************************************************************
**** add info canton unemployment/population and share ethnicity/language  **
*****************************************************************************

merge m:1 cant_res year using  "$dat/pop_all.dta", keepusing(pop)
drop if _merge==2
drop _merge
destring pop, replace

merge m:1 cant_res year using  "$dat/unemp.dta", keepusing(unemp)
drop if _merge==2
drop _merge


foreach x in ethn spr {
merge m:1 country cantass fyear `x'_cd using "$dat/file_`x'_share.dta"
ren share share_`x'
ren share_alt share_alt_`x'
drop if _merge==2
drop _merge
replace share_`x'=0 if mi(share_`x') & !mi(country) & !mi(cantass) & `x'_cd!=-1  & `x'_cd!=998 & year!=1994  //gets missing value when there is noone with that combination in the previous year 
}

*********************************
** restrict to relevant sample **
*********************************
keep if fmcase_begin>200803 & !mi(fmcase_begin)   // wunschkanton available from april 2008 (not full coverage before)
keep if wait_perm<1826 & !mi(wait_perm) // exclude hardship-cases
drop if mi(cantreq) // if missing, assignment to canton don't happen within expected time frame
drop if mi(fcenter) | mi(farrive)  // necessary covariates

keep if born_sd==0 | born_sd==. // drop if change birth date 


keep gap_day  multiple_cant_tot german_info  move_later woman farrive age lnwait* wait* bur_id id id_orig year* now* nocom* ethn_cd reli_cd spr_cd country share_* age_ar* fmarried fcenter cantass fcase_begin unemp_ass mig ln* match* colan* coeth* conat* uniq* french* country_born emp emp90 indu* cantnum cantreq with_ch* fams* getF

save "$dat/ZEMIS_final.dta", replace







